#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/9/25 16:51
# @Author  : 王凯
# @File    : ningxia_grade.py
# @Project : spider-man
import datetime
import json
import re

import scrapy

from apps.creadit_grade_a.creadit_grade_a.items import NetCreditGradeAItem


class NingXiaGradeSpider(scrapy.Spider):
    name = "ningxia_grade"
    province = "宁夏"
    url = "https://etax.ningxia.chinatax.gov.cn/sword?ctrl=AjnsxyjbNXCtrl_initView"
    Request = scrapy.Request
    FormRequest = scrapy.FormRequest

    def start_requests(self):
        url = "https://etax.ningxia.chinatax.gov.cn/sword?ctrl=AjnsxyjbNXCtrl_initView"
        yield self.Request(url, callback=self.parse_nd)

    def parse_nd(self, response, **kwargs):
        nd_list = json.loads(response.xpath('//div[@id="SwordPageData"]/@data').get()).get("data")[1].get("data")
        for nd in nd_list:
            if str(nd['code']) in [str(i) for i in range(datetime.datetime.now().year - 1, datetime.datetime.now().year + 1)]:
                url = "https://etax.ningxia.chinatax.gov.cn/ajax.sword?ctrl=AjnsxyjbNXCtrl_query"
                body = f"postData=%7B%22tid%22%3A%22%22%2C%22ctrl%22%3A%22AjnsxyjbNXCtrl_query%22%2C%22data%22%3A%5B%7B%22name%22%3A%22cxForm%22%2C%22data%22%3A%7B%22nsrsbh%22%3A%7B%22value%22%3A%22%22%7D%2C%22nsrmc%22%3A%7B%22value%22%3A%22%22%7D%2C%22pdnd%22%3A%7B%22value%22%3A%22{nd.get('code')}%22%7D%2C%22dq%22%3A%7B%22value%22%3A%22%22%7D%7D%2C%22sword%22%3A%22SwordForm%22%7D%5D%2C%22bindParam%22%3Atrue%7D+"
                yield self.FormRequest(url, body=body, method="POST", callback=self.parse_detail, headers={"Content-Type": "application/x-www-form-urlencoded"})

    def parse_detail(self, response, **kwargs):
        datas = response.json().get("data")
        if datas:
            trs = response.json().get("data")[0].get("trs")
            for data in trs:
                item = NetCreditGradeAItem()
                item.taxpayer_id = re.sub(r"\s+", "", data.get("tds").get("shxydm").get("value"))
                item.company_name = re.sub(r"\s+", "", data.get("tds").get("nsrmc").get("value"))
                item.year = re.sub(r"\s+", "", data.get("tds").get("pdNd").get("value"))
                item.province = self.province
                yield item


def run():
    from scrapy import cmdline

    cmdline.execute("scrapy crawl ningxia_grade".split())


if __name__ == "__main__":
    run()
